Examen visión computacional¶
Instrucciones¶
- Revisa el dataset que se incluye en las celdas siguientes.
- En base a este, crea un clasificador que pueda decir el tipo de coche y su color.
- Llena las celdas de código como se va solicitando.
- Al terminar, sube el notebook con todas las celdas ejecutadas a canvas.
Imports¶
import numpy as np
import cv2
import os
import math
import torch
import torch.nn.functional as F
from torch import optim, nn, utils, Tensor
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataset import random_split
import pandas as pd
import torchmetrics
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
# Local settings
# importante cambiar path en caso de querer probar localmente
ABS_PATH_DATASET = "~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars/"
ABS_PATH_DATASET = os.path.expanduser(ABS_PATH_DATASET)
ABS_PATH_MANUAL_TESTING_DATASET = "~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/"
ABS_PATH_MANUAL_TESTING_DATASET = os.path.expanduser(ABS_PATH_MANUAL_TESTING_DATASET)
cuda_available = torch.cuda.is_available()
mps_available = getattr(torch.backends, "mps", None) is not None and torch.backends.mps.is_available()
if cuda_available:
device = torch.device("cuda")
accelerator = "gpu"
devices = 1
elif mps_available:
device = torch.device("mps")
accelerator = "mps"
devices = 1
else:
device = torch.device("cpu")
accelerator = "cpu"
devices = None
pin_memory = True if cuda_available else False
print(f"Using device={device} (cuda={cuda_available}, mps={mps_available}), accelerator={accelerator}, devices={devices}")
Using device=mps (cuda=False, mps=True), accelerator=mps, devices=1
Descarga del dataset¶
!curl -sSL https://www.kaggle.com/api/v1/datasets/download/julichitai/multilabel-small-car-and-color-dataset -o ./cars_multilabel.zip
!unzip -q cars_multilabel.zip -d ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars
!rm cars_multilabel.zip
!ls $ABS_PATH_DATASET
matiz black matiz red rio blue tiggo black tiggo red matiz blue rio black rio red tiggo blue
!ls $ABS_PATH_DATASET/matiz\ black
000001.jpg 000002.jpg 000003.jpg 000004.jpg 000005.jpg 000006.jpg 000007.jpg 000008.jpg 000009.jpg 000010.jpg 000011.jpg 000012.jpg 000013.jpg 000014.jpg 000015.jpg 000016.jpg 000017.jpg 000018.jpg 000019.jpg 000020.jpg 000021.jpg 000022.jpg 000023.jpg 000024.jpg 000025.jpg 000026.jpg 000027.jpg 000028.jpg 000029.jpg 000030.jpg 000031.jpg 000032.jpg 000033.jpg 000034.jpg 000035.jpg 000036.jpg 000037.jpg 000038.jpg 000039.jpg 000040.jpg 000041.jpg 000042.jpg 000043.jpg 000044.jpg 000045.jpg 000046.jpg 000047.jpg 000048.jpg 000049.jpg 000050.jpg 000051.jpg 000052.jpg 000053.jpg 000054.jpg 000055.jpg 000056.jpg 000057.jpg 000058.jpg 000059.jpg 000060.jpg 000061.jpg 000062.jpg 000063.jpg 000064.jpg 000065.jpg 000066.jpg 000067.jpg 000068.jpg 000070.jpg 000071.jpg 000072.jpg 000073.jpg 000074.jpg 000076.jpg 000077.jpg 000081.jpg 000082.jpg 000083.jpg 000084.jpg 000085.jpg 000094.jpg 000099.jpg 000100.jpg 000101.jpg 000102.jpg 000105.jpg 000108.jpg 000112.jpg 000113.jpg 000125.jpg 000186.jpg 004685d0872defb3755b42a39f374a8e3e96c3d2.png 01cacfae3feffcb3d019f906497242a21c625858.png 030215640ea6a48c30ce6dfec047486daae6be8c.png 0566605b35c9c7e84792a70747c7b593bca97b21.png 05725e7668a1dc5f0b33648fdb6ee8bc.png 076871215f8de1752d0776bece3d547e0c3c2990.png 08065d7ba0fddb3b7b021a43bd55804b1711786a.png 08afc7aee91e68cbe86311340d3752e3.png 0f1f3625316794d1d2f60770e43eeb21f3f75786.png 10ecbd57346e5c638071352bc21c5e29440d03c9.png 13449f0bea2a9cedd905f8e140a2adfb652db7b2.png 169ad3e8bf4e1235df61a47971b47bdc.png 16f9eac46b66fd41e31368a981b6b945308d77c0.png 177a823220a56625542c3a550daec9e2c66c4aef.png 197aa1e3f4d2fc14852d441718df060e.png 1e0086885e7cccf8341658ebdf4b27ba9fd0c355.png 1e6a20a78e76c85a6ad545ede0f1766b9bd7459f.png 1f7747ddbef6302a318315c63ae5d803c794e595.png 203ae8e92d38a1476b4a5a93cd8fb2c416ca3b69.png 2165093b5686b62309c7290e35c86e094696d6fb.png 2215dacb7bec8ec0767e36119e6424be.png 25b4786b9cfe42e3141f1cea900ec962e42de56a.png 28603f16dc931fc35e46f18ba48a6e25c550dbf9.png 2a00000179ed6b0d8ea71aab283545974bcc.png 2a00000179efb6264a37f1edc06412d3ae96.png 2a0000017a006362a784769d35eb81a5a21c.png 2a0000017a0206d0bb48fb123b3fb7da45d1.png 2a0000017a0a9cf8af0cc089f4766d96f1ea.png 2a0000017a0cc9c0e49d437fa30725744c27.png 2a0000017a0e7b63e8158ba07e8b346bbe79.png 2a0000017a193885652f06b4edf4bd95e197.png 2a884d73e53b56fbbc2cea45eb281776.png 2ae07662150bd52cf9c702bd63cdd629.png 2c2bbe596a3cb0e37d32a5f4338ef5b2e61810a1.png 2d000d5899bfe225fccdd1cb32a46318_sr.png 2d31b1411357835256d01d14bd00bb2e13b1b660.png 2d78db60d1e46a087f6a4e174e349bfda5ab5524.png 31edbeeb90b87266abaf54c7b227f52c4fe59713.png 33e78b416574915f162866c795b0436ade360285.png 3498bd8106da059df1bd99e234b13a9d70d4fcd2.png 39d86724b0fb446ee8e61f9fe2882086dae735c1.png 3a92939b32afd6f469a3aecc79e5f1c03ca1d157.png 3b027cca1df1a7806c07881650cc16d5cdddc8bb.png 3f0cad75aa3a35107fa1b04fd60be061.png 3f9d6901551b9b455ef01a4f5ebbdf1e8207fecb.png 3fb9de18637440f89755160ebc4ad1768155c523.png 4036bf3af49db65bea3459e7aab5fd0abb0f7f4f.png 40c43857390029328336cef684d685bb793a0e38.png 42e79ef85f0342ec04e6f2d9acab50ec555edc71.png 4456de86d23932efa1c1296c0096890dd6b7de41.png 44f16a7561b9f3a6a274b96ee501fb1f.png 48dc3046b57d7a7d9f433249972c03080f07f7ab.png 4931d3fd903fcaa6902977af95fd154666c3afe1.png 4b80bb029408554dbd438662d823b9d6a3128e3b.png 4d6dafd1fab82a40661ca3f9c278c4c0.png 4dd80bf9b477eda24dc1e8ffaf87d74c.png 4f64d41fd138ad5f1f4cd59d7cd3aa53a0aeddf9.png 539cd86c6d85544db20300fbf6eb643d69c3738e.png 542c5b7c89bbc79eb62e31424a11bd0b.png 574ad54196dea84fac5d2f8bfd83a2e3f303d576.png 57ab289fb6ee116b24957dede31509ea.png 581babd2d16e09f1533c488d7b2af1a9747c620d.png 59582d8f154dff7c3fe18c7b5c938e26001444a7.png 5ada97601b178f6d9a02e73d204a26c88d439418.png 5c87863079c9be45cb550b2bc91ca18471843a83.png 5d3e4b9ca0ddb7104e8d926d6407072a93b282af.png 5d992544151cba201b43955139d30c22025971fd.png 5da169abb6c889438fec5b1afc5656d6a103f41a.png 5e17b0bf3186e43dc03daef88b77d431.png 5e40729440aa762300da0bcdd11633534a10cc8a.png 5eb45504a687f89f716cb4693011689ccb84b4e0.png 5f381dbae92f5681b09a03241ae948d10811c4eb.png 615cc8de5ad8bac96962915d50d24608.png 632c3ca7787631a249ac3daff256844b496d6ce1.png 66f8bf6c6c67a390e766ab9b5c473d13.png 679b2903a61e0112da05e60b74a8f2a6bc788fb2.png 6a44db030d669ffe655bc9d7e3d583851e361988.png 6c22d7ee08641d80a2c529947e8272758b832f96.png 6fb3a0311eaca9058e958fe50a61ab11.png 7057d30eb94c1d0994301fc32a89104ef0d78085.png 70eaf283c7a1d288c7989a7c2a885fdc2e91997a.png 7862a463b3da7ecae6cc16b197ff6fc22c6ee6bf.png 7a3819bc85e7b49ccc4d4cdb5f454e80c70eafca.png 7ade428414ccc9f77963c00e142b5a3d61ca3618.png 81b1310651ffaaeed79110e9bcc7b7c57c6ad0de.png 841f6f5d04167f922447d8635d99c83f7154fdf1.png 84bdd4c54d3b1a6e80a155bdad1c01d698a2dd21.png 857ffd6ad885fa349bc4615c750ea5c85f0527fc.png 87ae698de4efcce621fcf7450a2ad6ab9bb1f04a.png 89ec405740b7e3ae81f0942bb7f9730c72229b0c.png 8cac04a5ffc67914b8298ba01d6be45d8f5f3b12.png 8df14ce4a45aeadd065170a3578a9be01125d279.png 8e2f9c0b61fcac98b47e4b9575cb3c9883a11c43.png 8f327da21319e36516d5c889a4e0f1a9.png 93b1982ab8920535e71062a8b1cc28256f573563.png 93dcbdc94b57266bac92827c6005b9b2.png 95db7d057be6a1dc2bcdb4b66dc86c1f80f1b846.png 970f595cae2c97f5f279b88509448b1d.png 99edf034674c8151c84be90d19bfec61c6c72255.png 9b87866555f4936846eab149e05458c1bae63656.png a18a7ba32bcf88d3ef54f5ef54c22dd5eba97c81.png a26c18fdcfed101758e45d5704a001a6c23a1084.png a29281de40bcb98752247ae4d1b64482c65481e2.png a8e4ea0431fc0700f936a86cfd96a7a6.png ab2214d42bc082f94ea83379c1eec30ad957fb84.png b296ecf29840e105e9b40464e6c284972c9554a4.png b51e78365fc3cf6d20fd89f5dc1507ea32baaced.png b8d6e7c7bd105977732caec334c7a6f8.png b8de5f970ebd938e545f21f990f8258eee1211c9.png bcfc567b0804e656b2426d9ce26f2ae9dd6e4c76.png c2babb71e0f86a264578811e7ac2bfdcb81476f3.png c377fd92b98bf949e033f875c561afb6ffa1405a.png c47842153c610d86b23fe0e90ff431d2df0cc908.png c4be2e0c73f8be8e2e8518ce664b34f6396149ca.png c70bfac2fa1a97df83b22d61463e57901dec8a79.png c72536ff1adf24e453190f056b38f9ee35983151.png cb2050530923bd75daa74e36d8f7430272433132.png cbf6b173ee2d4a86578d489eba72fefa.png cdd3d54e9302207f19fcd23014bbc582c82709dd.png cf18dc1eb49dde59b0d82c7b8f5df22d.png cf7d8d275d2b51fcf6ddbb51d493acdc46495f2d.png d06104e169b4d38aaae429a9eb49715a74d39390.png d14c594e5deeb772d368aa549532205b1a5ca632.png d18783e81f9e9eaedf4975852c2721e1579eb5a0.png d6d9e6a194cb813d56da1779ae9582b6df704876.png d7f4568956271d8a5b095f218ca12020b371a399.png d8a426fb66d29b713a40a45b3aed8896c984abdc.png de0d3a8809e650ac901a121cd07edc2be887047b.png e040bd5a25e20b813016c6a504771a4a81cd6be2.png e0ef59ae84c809aba913548146e51735b53c463a.png e3605bb29350f4a65ee33dc027398608256396fe.png e709958b51b06d63748727d08cb4a6ed34f96429.png e8cdb685caeff4ef7cf2b7a309aa51cdcac2cda6.png e922eb2aa27ea4c4cbe93b3ea377885b9540fb6b.png ea98c79ce8e331d282dff7f44bc32de00f648f5a.png eb0634e709598052ec962a8e11b4ce531ceb7d28.png ee096d9172d492e6864e153d80e70f0a96ead074.png ee752cc92e2067fa92c813219510f7513144e330.png f10e0850b6edbee34b7ed2d0a676acb8a80b4134.png f1869e20122a7efa362797ed9fca3718.png f3d55fb02306f647c109b5db9c10d4d837ba8cde.png f59c2e05290e91e9a99a9141c3e97903a2660617.png f772a64533f2e720ac11ee84f27fb259.png ff407d7b4540186daddb60e3232930c7.png
num_classes = len(os.listdir(ABS_PATH_DATASET))
print(f"Number of classes in dataset: {num_classes}")
cols = 5
rows = math.ceil(num_classes/cols)
fig, axes = plt.subplots(rows, cols, figsize=(15, 8))
axes = axes.flatten() if num_classes > 1 else [axes]
for i, class_name in enumerate(os.listdir(ABS_PATH_DATASET)):
class_path = os.path.join(ABS_PATH_DATASET, class_name)
img_name = os.listdir(class_path)[0]
img_path = os.path.join(class_path, img_name)
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
axes[i].imshow(img)
axes[i].set_title(class_name, fontsize=10)
axes[i].axis('off')
for j in range(i + 1, len(axes)):
axes[j].axis('off')
plt.show()
Number of classes in dataset: 9
se pude ver que hay algunos modelos de coches un poco viejos, pero al entrar a inspeccionar manualmente, también vpi que hay modelos recientes, así que se podría decir que en una clase van a caber dos o más tipos de modelos, veremos como maneja esto la red
def analyze_dataset(path):
class_counts = {}
total_images = 0
for class_name in os.listdir(path):
class_path = os.path.join(path, class_name)
if not os.path.isdir(class_path):
continue
num_images = len([f for f in os.listdir(class_path) if f.lower().endswith(('.png','.jpg','.jpeg','.ppm'))])
class_counts[class_name] = num_images
total_images += num_images
average_images_per_class = total_images / len(class_counts)
for class_name, count in class_counts.items():
print(f"Class '{class_name}': {count} images")
print(f"\nTotal number of images: {total_images}")
print(f"Average number of images per class: {average_images_per_class:.2f}")
print(f"Class with least images: {min(class_counts, key=class_counts.get)} ({min(class_counts.values())} images)")
print(f"Class with most images: {max(class_counts, key=class_counts.get)} ({max(class_counts.values())} images)")
def sample_image_dims(path):
files = []
for class_name in os.listdir(path):
class_path = os.path.join(path, class_name)
if os.path.isdir(class_path):
cls_files = [os.path.join(class_path, f) for f in os.listdir(class_path)]
files.extend(cls_files)
dims = []
for img_path in files:
img = cv2.imread(img_path)
dims.append((img.shape[1], img.shape[0]))
dims = np.array(dims)
print(f"\nImage size statistics:")
print(f"Width: min={dims[:,0].min()}, max={dims[:,0].max()}, mean={dims[:,0].mean():.1f}, std={dims[:,0].std():.1f}")
print(f"Height: min={dims[:,1].min()}, max={dims[:,1].max()}, mean={dims[:,1].mean():.1f}, std={dims[:,1].std():.1f}")
analyze_dataset(ABS_PATH_DATASET)
sample_image_dims(ABS_PATH_DATASET)
Class 'matiz black': 235 images Class 'rio black': 306 images Class 'matiz red': 346 images Class 'rio red': 431 images Class 'tiggo black': 286 images Class 'matiz blue': 334 images Class 'rio blue': 262 images Class 'tiggo red': 262 images Class 'tiggo blue': 273 images Total number of images: 2735 Average number of images per class: 303.89 Class with least images: matiz black (235 images) Class with most images: rio red (431 images) Image size statistics: Width: min=138, max=5760, mean=686.3, std=442.8 Height: min=124, max=3840, mean=472.9, std=300.7
Se puede ver que las imágenes están bastante balanceadas en cuanto a cantidad en cada clase, mas en tamaño no mucho, varían mucho en tamaño
Carga de imágenes en dataloaders (15 puntos)¶
TO DO: Construye los dataloaders necesarios, con las transformaciónes adecuadas, y muestra un batch. Connstruye las etiquetas de las imágenes para poder hacer clasificación multi-etiqueta.
train_transform = transforms.Compose([
transforms.Resize((150, 150)),
transforms.RandomCrop((128, 128)),
transforms.RandomHorizontalFlip(.5),
transforms.RandomRotation(20),
transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
test_transform = transforms.Compose([
transforms.Resize((150, 150)),
transforms.CenterCrop((128, 128)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
añadí la random rotation ya que en este tipo de imágenes no importa mucho si están espejeadas, de hecho, enriquece al modelo porque es como si el coche estuviera tomado desde otro punto
import lightning as L
class CarDataModule(L.LightningDataModule):
def __init__(self, data_dir, batch_size=32, num_workers=4, train_transform=None, test_transform=None, pin_memory=False):
super().__init__()
self.data_dir = os.path.expanduser(data_dir)
self.batch_size = batch_size
self.num_workers = num_workers
self.train_transform = train_transform
self.test_transform = test_transform
self.pin_memory = pin_memory
self.setup()
def setup(self, stage=None):
full_dataset = ImageFolder(self.data_dir, transform=self.train_transform)
total_size = len(full_dataset)
train_size = int(.7 * total_size)
val_size = int(.15 * total_size)
test_size = total_size - train_size - val_size
self.train, self.val, self.test = random_split(
full_dataset,
[train_size, val_size, test_size],
generator=torch.Generator().manual_seed(42)
)
self.val.dataset.transform = self.test_transform
self.test.dataset.transform = self.test_transform
self.num_classes = len(full_dataset.classes)
self.classes = full_dataset.classes
def train_dataloader(self):
return DataLoader(
self.train,
batch_size=self.batch_size,
drop_last=True,
shuffle=True,
persistent_workers=True,
num_workers=self.num_workers,
pin_memory=self.pin_memory
)
def val_dataloader(self):
return DataLoader(
self.val,
batch_size=self.batch_size,
drop_last=False,
shuffle=False,
persistent_workers=True,
num_workers=self.num_workers,
pin_memory=self.pin_memory
)
def test_dataloader(self):
return DataLoader(
self.test,
batch_size=self.batch_size,
drop_last=False,
shuffle=False,
num_workers=self.num_workers,
pin_memory=self.pin_memory
)
Datamodule estándar como le habíamos estado haciendo, hice el split de train/val7test dentro del mismo
dm = CarDataModule(ABS_PATH_DATASET, batch_size=16, train_transform=train_transform, test_transform=test_transform, pin_memory=pin_memory)
loader = dm.train_dataloader()
images, labels = next(iter(loader))
print('Batch shapes -> images:', images.shape, 'labels:', labels.shape)
print(f'Classes: {dm.classes}')
print(f'Number of classes: {dm.num_classes}')
inv = lambda x: x * 0.5 + 0.5
n = images.size(0)
cols = 4
rows = math.ceil(n / cols)
fig, axes = plt.subplots(rows, cols, figsize=(15, 8))
axes = axes.flatten()
for i in range(n):
img = inv(images[i]).permute(1, 2, 0).cpu().numpy()
axes[i].imshow(img)
class_name = dm.classes[labels[i].item()]
axes[i].set_title(class_name, fontsize=10)
axes[i].axis('off')
for j in range(i + 1, len(axes)):
axes[j].axis('off')
plt.tight_layout()
plt.show()
Batch shapes -> images: torch.Size([16, 3, 128, 128]) labels: torch.Size([16]) Classes: ['matiz black', 'matiz blue', 'matiz red', 'rio black', 'rio blue', 'rio red', 'tiggo black', 'tiggo blue', 'tiggo red'] Number of classes: 9
Visualización de imágenes de training ya carhadas en el dataloader
Definición del modelo (15 puntos)¶
TO DO: Crea un modelo con capas convolucionales para hacer la clasificación.
resnet_model = torch.hub.load("pytorch/vision", "resnet18", weights="IMAGENET1K_V1")
for param in resnet_model.parameters():
param.requires_grad = False
resnet_model.layer4.requires_grad_(True)
resnet_model.fc = torch.nn.Sequential(
torch.nn.Dropout(0.3),
torch.nn.Linear(512, dm.num_classes)
)
resnet_model.fc
Se utilizará el modelo de resnet18 ya que he tenido buena experiencia para clasificación multiclase de imágenes en las que no se necesita capturar gran detalle de ellas, por lo que las imágenes pueden ser redimensionadas a una escala no muy grande para lograr una buena velocidad de entrenamiento
class CarModelLightning(L.LightningModule):
def __init__(self, model, learning_rate, num_classes):
super().__init__()
self.learning_rate = learning_rate
self.model = model
self.train_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
self.val_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
self.test_acc = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
def forward(self, x):
return self.model(x)
def _shared_step(self, batch):
features, true_labels = batch
logits = self(features)
loss = F.cross_entropy(logits, true_labels)
predicted_labels = torch.argmax(logits, dim=1)
return loss, true_labels, predicted_labels
def training_step(self, batch, batch_idx):
loss, true_labels, predicted_labels = self._shared_step(batch)
self.log("train_loss", loss)
self.train_acc(predicted_labels, true_labels)
self.log("train_acc", self.train_acc, prog_bar=True, on_epoch=True, on_step=False)
return loss
def validation_step(self, batch, batch_idx):
loss, true_labels, predicted_labels = self._shared_step(batch)
self.log("val_loss", loss, prog_bar=True)
self.val_acc(predicted_labels, true_labels)
self.log("val_acc", self.val_acc, prog_bar=True)
def test_step(self, batch, batch_idx):
with torch.no_grad():
loss, true_labels, predicted_labels = self._shared_step(batch)
self.test_acc(predicted_labels, true_labels)
self.log("test_acc", self.test_acc)
def configure_optimizers(self):
optimizer = torch.optim.RMSprop(self.parameters(), lr=self.learning_rate)
return optimizer
Wrapper del modelo con Lightning, en donde se definen los steps en métodos de la clase abstracta de Lightning
Entrenamiento (5 puntos)¶
TO DO: Entrena el modelo, y muestra los resultados de validación y entrenamiento.
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.loggers import CSVLogger
car_model_lightning = CarModelLightning(
model=resnet_model,
learning_rate=0.001,
num_classes=dm.num_classes
)
print(f"device={device}, accelerator={accelerator}, devices={devices}")
trainer = L.Trainer(
accelerator=accelerator,
devices=devices or 0,
logger=CSVLogger(save_dir="logs/", name="car-classifier-resnet18"),
max_epochs=20,
callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=5)],
)
💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry. GPU available: True (mps), used: True TPU available: False, using: 0 TPU cores
device=mps, accelerator=mps, devices=1
Se crea el trainer al que se le pasa el modelo, se configura el logger, y el callback de earlystopping
trainer.fit(model=car_model_lightning, datamodule=dm)
| Name | Type | Params | Mode --------------------------------------------------------- 0 | model | ResNet | 11.2 M | train 1 | train_acc | MulticlassAccuracy | 0 | train 2 | val_acc | MulticlassAccuracy | 0 | train 3 | test_acc | MulticlassAccuracy | 0 | train --------------------------------------------------------- 8.4 M Trainable params 2.8 M Non-trainable params 11.2 M Total params 44.725 Total estimated model params size (MB) 73 Modules in train mode 0 Modules in eval mode
Epoch 7: 100%|██████████| 119/119 [00:03<00:00, 36.78it/s, v_num=4, val_loss=0.445, val_acc=0.893, train_acc=0.960]
Se puede ver que el entrenamiento para en la época 7 por la política del earlystopper.
metrics = pd.read_csv(f"{trainer.logger.log_dir}/metrics.csv")
df_epochs = metrics.groupby('epoch').mean()
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
axes[0].set_xlabel('Epochs')
axes[0].set_ylabel('Accuracy')
axes[0].plot(df_epochs['train_acc'], label="Train acc")
axes[0].plot(df_epochs['val_acc'], label="Val acc")
axes[0].set_title("Training and Validation Accuracy")
axes[0].legend(loc='lower right')
axes[1].set_xlabel('Epochs')
axes[1].set_ylabel('Loss')
axes[1].plot(df_epochs['train_loss'], label="Train loss")
axes[1].plot(df_epochs['val_loss'], label="Val loss")
axes[1].set_title("Training and Validation Loss")
axes[1].legend(loc='upper right')
plt.tight_layout()
plt.show()
Se llegó a una accuracy muy alta, alcanzando el 89% el validación y 96% en training, y también un loss muy bajo.
Considero que el modelo quedó ligeramente overfitteado, ya que hay una diferencia de 7% entre training y validation
trainer.test(datamodule=dm)
Restoring states from the checkpoint path at logs/car-classifier-resnet18/version_4/checkpoints/epoch=7-step=952.ckpt Loaded model weights from the checkpoint at logs/car-classifier-resnet18/version_4/checkpoints/epoch=7-step=952.ckpt Loaded model weights from the checkpoint at logs/car-classifier-resnet18/version_4/checkpoints/epoch=7-step=952.ckpt
Testing DataLoader 0: 100%|██████████| 26/26 [00:00<00:00, 53.76it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Test metric DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
test_acc 0.8613138794898987
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Test metric DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
test_acc 0.8613138794898987
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
[{'test_acc': 0.8613138794898987}]
En testing conseguimos una accuracy de 86%, resultado de que el modelo está ligeramente overfitteado, lo ideal sería que ésta sea igual a la de validación
Predicción (5 puntos)¶
TO DO: Descarga una imagen, y haz una predicción sobre ella. Usa wget como en el siguiente ejemplo.
!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/matiz_rojo.jpg https://i.pinimg.com/736x/83/90/e7/8390e7fb457d2b87f98483982ebe4a62.jpg
!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/kia_rio_rojo.jpg https://cdn.pixabay.com/photo/2018/04/09/22/07/car-3305699_1280.jpg
!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/kia_rio_azul.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/d/d5/KIA_Rio_-_CIAS_2012_%286787393208%29.jpg/640px-KIA_Rio_-_CIAS_2012_%286787393208%29.jpg
!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/kia_rio_negro.jpeg https://upload.wikimedia.org/wikipedia/commons/thumb/6/60/Kia_YB_Rio_Hatch.jpeg/640px-Kia_YB_Rio_Hatch.jpeg
!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/tiggo_negro.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/5/5b/Chery_Tiggo_DR_Edition_IMG001.jpg/640px-Chery_Tiggo_DR_Edition_IMG001.jpg
!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/tiggo_rojo.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/b/ba/Chery_Tiggo_facelift_II_China_2012-05-12.jpg/640px-Chery_Tiggo_facelift_II_China_2012-05-12.jpg
!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/matiz_negro.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/3/32/Matiz_017.jpg/640px-Matiz_017.jpg
!wget -O ~/src/tec/semestre_7_local/ds_ai_ml_models/DL_Data_sets/DL_Data_sets-main/cars-manual-testing/matiz_azul.jpg https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Daewoo_Matiz_%28front%29%2C_Denpasar.jpg/640px-Daewoo_Matiz_%28front%29%2C_Denpasar.jpg
Importante cambiar la ruta de destino de las descargas en caso de querer probar de forma local
dato curioso: me banearon de pinterest al estar copiando links de descargas, yo creo que vieron mi actividad sospechosa
manual_test_images = os.listdir(ABS_PATH_MANUAL_TESTING_DATASET)
n_imgs = len(manual_test_images)
cols = 4
rows = math.ceil(n_imgs / cols)
fig, axes = plt.subplots(rows, cols, figsize=(15, 8))
axes = axes.flatten() if n_imgs > 1 else [axes]
for i, img_name in enumerate(manual_test_images):
img_path = os.path.join(ABS_PATH_MANUAL_TESTING_DATASET, img_name)
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
axes[i].imshow(img)
axes[i].set_title(img_name, fontsize=10)
axes[i].axis('off')
for j in range(i + 1, len(axes)):
axes[j].axis('off')
plt.show()
Imágenes descargadas de internet
from PIL import Image
car_model_lightning.to(device).eval()
n_imgs = len(manual_test_images)
cols = 4
rows = math.ceil(n_imgs / cols)
fig, axes = plt.subplots(rows, cols, figsize=(15, 8))
axes = axes.flatten() if n_imgs > 1 else [axes]
for i, img_name in enumerate(manual_test_images):
img_path = os.path.join(ABS_PATH_MANUAL_TESTING_DATASET, img_name)
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
pil_img = Image.fromarray(img)
img_tensor = test_transform(pil_img).unsqueeze(0).to(device)
with torch.no_grad():
logits = car_model_lightning(img_tensor)
pred_idx = int(logits.argmax(dim=1).cpu().item())
pred_class = dm.classes[pred_idx]
axes[i].imshow(img)
axes[i].set_title(f"Prediction: {pred_class}\nLabel: {img_name}", fontsize=10)
axes[i].axis('off')
for j in range(i + 1, len(axes)):
axes[j].axis('off')
plt.tight_layout()
plt.show()
Probando con las imágenes descargadas de internet, en todos los casos en modelo predijo la clase correcta, ésto se lo atribuyo al dataset, que por ejemplo comparado con el de las señales alemanas, tiene una mejor calidad, distribución más pareja y sobre todo el encuadre de los coches varía mucho y no solo es de un tipo, lo que hace al modelo más versátil y capaz de clasificar fotos más realistas de coches, sin estar en un ambiente controlado